


#####################################################
###### 03_descriptive_graphs
###### Graphs summarizing descriptive differences 
###### across groups
#####################################################


#####################################################
###### Packages and imports
#####################################################

rm(list = ls())
library(dplyr)
library(ggplot2)
library(grid)
library(gridExtra)
library(here)
library(viridis)
library(ggpubr)
library(zoo)
library(ggalt)
library(stringr)

## source plotting theme
source(here("code/00_utils.R"))
FIG_DIR <- here("output/figs/")

#####################################################
###### Sample characteristics
#####################################################

## Load data
df_all <- read.csv(here("data/analytic_df.csv"))

## set df_analytic to df_all
df_analytic <- df_all 

race_sum <- weighted_prop_summaries(df_analytic, "derived_race", "Race/eth")
gender_sum <- weighted_prop_summaries(df_analytic, "derived_gender_cat", "Gender")
affil_sum <- weighted_prop_summaries(df_analytic, "derived_pol", "Political\naffil")
educ_sum <- weighted_prop_summaries(df_analytic, "derived_educ_3cat", "Educ") 

## rowbind and plot results
sample_dem_summary <- rbind.data.frame(race_sum, gender_sum, affil_sum, educ_sum) %>%
  mutate(dem_level = sprintf("%s:\n%s", dem, level)) %>%
  group_by(dem) %>%
  mutate(dem_level = reorder(level, prop))  


ggplot(sample_dem_summary, aes(x = dem_level, y = prop, 
                               group = dem, fill = dem)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  theme_new(base_size = 24) +
  geom_label(aes(x = dem_level, y = prop, 
                 label = round(prop, 2),
                 group = dem),
             position = position_dodge(width = 1),
             fill = "white", size = 6) +
  guides(scale  = "none") +
  ylab("Weighted proportion\nof sample") +
  xlab("") +
  theme(axis.text.x = element_text(angle = 90,
                                   size = 14),
        plot.margin = unit(c(0.5, 1, 0.5, 0.5), "cm"),
        legend.position = c(0.2, 0.8)) +
  scale_fill_viridis_d() +
  ylim(0, 0.7) +
  labs(fill = "") +
  guides(fill = "none")

ggsave(paste0(FIG_DIR, "dem_prop_fullsample.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 12,
       height = 6)


## set df_analytic to parents only
df_analytic <- df_all %>% filter(!derived_parent %in% c("Never parent", "Other"))

race_sum <- weighted_prop_summaries(df_analytic, "derived_race", "Race/eth")
gender_sum <- weighted_prop_summaries(df_analytic, "derived_gender_cat", "Gender")
affil_sum <- weighted_prop_summaries(df_analytic, "derived_pol", "Political\naffil")
educ_sum <- weighted_prop_summaries(df_analytic, "derived_educ_3cat", "Educ") 

## rowbind and plot results
sample_dem_summary <- rbind.data.frame(race_sum, gender_sum, affil_sum, educ_sum) %>%
  mutate(dem_level = sprintf("%s:\n%s", dem, level)) %>%
  group_by(dem) %>%
  mutate(dem_level = reorder(level, prop))  


ggplot(sample_dem_summary, aes(x = dem_level, y = prop, 
                               group = dem, fill = dem)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  theme_new(base_size = 24) +
  geom_label(aes(x = dem_level, y = prop, 
                 label = round(prop, 2),
                 group = dem),
             position = position_dodge(width = 1),
             fill = "white", size = 6) +
  guides(scale  = "none") +
  ylab("Weighted proportion\nof sample") +
  xlab("") +
  theme(axis.text.x = element_text(angle = 90,
                                   size = 14),
        plot.margin = unit(c(0.5, 1, 0.5, 0.5), "cm"),
        legend.position = c(0.2, 0.8)) +
  scale_fill_viridis_d() +
  ylim(0, 0.7) +
  labs(fill = "") +
  guides(fill = "none")

ggsave(paste0(FIG_DIR, "dem_prop_parents.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 12,
       height = 6)


#####################################################
###### Overall fairness ratings 
#####################################################

## df_analytic is still the parents only 

## group by sample and status quo cond
method_fair_compare <- df_analytic %>%
  group_by(derived_statusquo_cond) %>%
  summarise(
    prop_morefair = weighted.mean(derived_alg_morefair, 
                                          w = derived_comb_weight),
    continuous_morefair = weighted.mean(derived_continuous_morefair, 
                                        w = derived_comb_weight,
                                        na.rm = TRUE),
            se_prop = (prop_morefair*(1-prop_morefair)/n())^(1/2),
            se_mean = (sd(derived_continuous_morefair, na.rm = TRUE))/sqrt(n()),
            lower_prop = prop_morefair - se_prop,
            upper_prop = prop_morefair + se_prop,
            lower_mean = continuous_morefair - se_mean,
            upper_mean = continuous_morefair + se_mean)  %>%
  ungroup() 


## proportion more fair 
binary_compare <- ggplot(method_fair_compare,
       aes(x = factor(derived_statusquo_cond,
                      levels = 
                      c("Parent requests",
                        "Counselor discretion",
                        "Simple rule",
                        "Weighted lottery")), y = prop_morefair)) +
  geom_bar(stat = "identity", position = "dodge", 
           fill = "wheat4", color = "black"
           ) +
  geom_errorbar(aes(ymin = lower_prop, ymax = upper_prop),
                width = 0.2,
                position = position_dodge(width = 1)) +
  coord_flip() +
  xlab("") +
  ylab("Proportion who think algorithm\n is fairer than\nstatus quo method") + 
  geom_label(aes(x = factor(derived_statusquo_cond), y = prop_morefair,
                   label = round(prop_morefair, 2)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.25,
             size = 8) + 
  theme_new(base_size = 16) +
  theme(axis.text.y = element_text(hjust = 1),
        axis.title.x = element_text(size = 14)) 

ggsave(paste0(FIG_DIR, "overall_pref_paronly.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 12,
       height = 8)

cont_compare <- ggplot(method_fair_compare,
       aes(x = factor(derived_statusquo_cond,
                      levels = 
                        c("Parent requests",
                          "Counselor discretion",
                          "Simple rule",
                          "Weighted lottery")), y = continuous_morefair)) +
  geom_bar(stat = "identity", position = "dodge", 
           fill = "wheat4", color = "black"
  ) +
  geom_errorbar(aes(ymin = lower_mean, ymax = upper_mean),
                width = 0.2,
                position = position_dodge(width = 1)) +
  coord_flip() +
  xlab("") +
  ylab("Continuous rating of algorithm\nas fairer\n(higher: alg. definitely more fair)") +
  geom_label(aes(x = factor(derived_statusquo_cond), y = continuous_morefair,
                 label = round(continuous_morefair, 2)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.25,
             size = 8) + 
  theme_new(base_size = 16) +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.x = element_text(size = 14)) 


## grid arrange 
g_combined <- grid.arrange(binary_compare, 
             cont_compare,
             ncol = 2,
             widths = c(1.5, 1))

ggsave(paste0(FIG_DIR, "overall_pref_binary_continuous.pdf"),
       plot = g_combined,
       device = "pdf",
       width = 12,
       height = 8)



#####################################################
###### Between-group heterogeneity - main text figures
#####################################################

## different forms of heterogeneity:

### 1. education/cultural capital
## education level
method_fair_compare_educ <- df_analytic %>%
  group_by(derived_educ_3cat, derived_statusquo_cond) %>%
  summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                          w = derived_comb_weight),
            se_prop = (prop_algmorefair*(1-prop_algmorefair)/n())^(1/2),
            lower = prop_algmorefair - se_prop,
            upper = prop_algmorefair + se_prop)  %>%
  ungroup() %>%
  rename(category = derived_educ_3cat)

### 2. partisanship 
method_fair_compare_pol <- df_analytic %>%
  group_by(derived_polideo_category, derived_statusquo_cond) %>%
  summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                          w = derived_comb_weight),
            se_prop = (prop_algmorefair*(1-prop_algmorefair)/n())^(1/2),
            lower = prop_algmorefair - se_prop,
            upper = prop_algmorefair + se_prop)  %>%
  ungroup()  %>%
  mutate(derived_polideo_category_simple = case_when(grepl("liberal", derived_polideo_category) ~ "Liberal",
                                                     grepl("conservative", derived_polideo_category) ~ "Conservative",
                                                     grepl("Moderate", derived_polideo_category) ~ "Moderate",
                                                    is.na(derived_polideo_category) ~ NA_character_)) %>%
  rename(category = derived_polideo_category_simple) %>%
  filter(!is.na(category)) %>%
  select(-derived_polideo_category)

## combined graph
method_fair_compare_educpol <- rbind.data.frame(method_fair_compare_educ %>%
                                                mutate(dem_type = "Education"), method_fair_compare_pol %>%
                                                mutate(dem_type = "Political ideology")) %>%
                                mutate(combined_cat = paste0(derived_statusquo_cond, ": ", dem_type))
cat_order_educ <- c("HS or less", "Some college", "College or\nprofessional school")

educ_graph <- ggplot(method_fair_compare_educpol %>% filter(dem_type == "Education"),
       aes(x = factor(category, levels = cat_order_educ, ordered = TRUE), 
           y = prop_algmorefair, group = factor(category, levels = cat_order_educ, ordered = TRUE),
           fill = category)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  #geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
  geom_label(aes(x = factor(category, levels = cat_order_educ, ordered = TRUE),
                 y = prop_algmorefair,
                 label = round(prop_algmorefair, 2),
                 group = factor(category, levels = cat_order_educ, ordered = TRUE)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.45,
             size = 6.5) +
  geom_errorbar(aes(ymin = lower, ymax = upper),
                width = 0.2,
                position = position_dodge(width = 1)) +
  coord_flip() +
  xlab("") +
  ylab("Proportion who think algorithm is\nfairer than status quo method") +
  theme_new(base_size = 18) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 16)) +
  labs(fill = "") +
  guides(fill = guide_legend(ncol = 3, reverse = TRUE)) +
  facet_wrap(~derived_statusquo_cond, scales = "free_y",
             ncol = 1)  +
  scale_fill_manual(values = c("HS or less" = 
                                 friendly_pal("vibrant_seven")[1],
                               "Conservative" =
                                 friendly_pal("contrast_three")[2],
                               "Some college" = 
                                 friendly_pal("vibrant_seven")[3],
                               "Moderate" = friendly_pal("contrast_three")[3],
                               "College or\nprofessional school" =
                                 friendly_pal("vibrant_seven")[5],
                               "Liberal" =
                                 friendly_pal("contrast_three")[1]),
                    breaks = c("HS or less",
                               "Some college",
                               "College or\nprofessional school")) + 
  ggtitle("By Education")

cat_order_partisan <- c("Conservative", "Moderate", "Liberal")

partisan_graph <- ggplot(method_fair_compare_educpol %>% filter(dem_type == "Political ideology"),
                       aes(x = factor(category, levels = cat_order_partisan, ordered = TRUE), 
                           y = prop_algmorefair, group = factor(category, levels = cat_order_partisan, ordered = TRUE),
                           fill = category)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  #geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
  geom_label(aes(x = factor(category, levels = cat_order_partisan, ordered = TRUE), 
                 y = prop_algmorefair,
                 label = round(prop_algmorefair, 2),
                 group = factor(category, levels = cat_order_partisan, ordered = TRUE)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.45,
             size = 6.5) +
  geom_errorbar(aes(ymin = lower, ymax = upper),
                width = 0.2,
                position = position_dodge(width = 1)) +
  coord_flip() +
  xlab("") +
  ylab("Proportion who think algorithm is\nfairer than status quo method") +
  theme_new(base_size = 18) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 16)) +
  labs(fill = "") +
  guides(fill = guide_legend(ncol = 3, reverse = TRUE)) +
  facet_wrap(~derived_statusquo_cond, scales = "free_y",
             ncol = 1)  +
  scale_fill_manual(values = c("HS or less" = 
                                 friendly_pal("vibrant_seven")[1],
                               "Conservative" =
                                 friendly_pal("contrast_three")[2],
                               "Some college" = 
                                 friendly_pal("vibrant_seven")[3],
                               "Moderate" = friendly_pal("contrast_three")[3],
                               "College or\nprofessional school" =
                                 friendly_pal("vibrant_seven")[5],
                               "Liberal" =
                                 friendly_pal("contrast_three")[1]),
                    breaks = c("Conservative",
                               "Moderate",
                               "Liberal")) + 
  ggtitle("By Political Ideology")

## save as one graph
ggarrange(educ_graph, partisan_graph, ncol = 2, align = "h")

ggsave(paste0(FIG_DIR, "educ_partisanship_compare.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 13,
       height = 9)


#####################################################
###### Supplement demographic plots 
#####################################################

method_fair_compare_race <- df_analytic %>%
  group_by(derived_race, derived_statusquo_cond) %>%
  summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                             w = derived_comb_weight),
            se_prop = (prop_algmorefair*(1-prop_algmorefair)/n())^(1/2),
            lower = prop_algmorefair - se_prop,
            upper = prop_algmorefair + se_prop)  %>%
  ungroup() %>%
  rename(category = derived_race)

ggplot(method_fair_compare_race,
       aes(x = category, 
           y = prop_algmorefair, group = category,
           fill = category)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  #geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
  geom_label(aes(x = category, 
                 y = prop_algmorefair,
                 label = round(prop_algmorefair, 2),
                 group = category),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 2.95,
             size = 6.5) +
  geom_errorbar(aes(ymin = lower, ymax = upper),
                width = 0.2,
                position = position_dodge(width = 1)) +
  coord_flip() +
  xlab("") +
  ylab("Proportion who think algorithm is\nfairer than status quo method") +
  theme_new(base_size = 14) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 14)) +
  labs(fill = "") +
  guides(fill = guide_legend(ncol = 3, reverse = TRUE)) +
  facet_wrap(~derived_statusquo_cond, scales = "free_y",
             ncol = 1)  +
  scale_fill_manual(values = friendly_pal("vibrant_seven")) 

ggsave(paste0(FIG_DIR, "race_compare.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 14,
       height = 9)



method_fair_compare_partisan <- df_analytic %>%
  group_by(derived_pol, derived_statusquo_cond) %>%
  summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                             w = derived_comb_weight),
            se_prop = (prop_algmorefair*(1-prop_algmorefair)/n())^(1/2),
            lower = prop_algmorefair - se_prop,
            upper = prop_algmorefair + se_prop)  %>%
  ungroup() %>%
  rename(category = derived_pol) 

cat_order_party <- c("Republican", "Independent", "Democrat")


ggplot(method_fair_compare_partisan %>%
      filter(category != "Unknown"),
       aes(x = factor(category, levels = cat_order_party, ordered = TRUE), 
           y = prop_algmorefair, group = category,
           fill = factor(category, levels = cat_order_party, ordered = TRUE))) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  #geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
  geom_label(aes(x = factor(category, levels = cat_order_party, ordered = TRUE), 
                 y = prop_algmorefair,
                 label = round(prop_algmorefair, 2),
                 group = factor(category, levels = cat_order_party, ordered = TRUE)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.95,
             size = 6.5) +
  geom_errorbar(aes(ymin = lower, ymax = upper),
                width = 0.2,
                position = position_dodge(width = 1)) +
  coord_flip() +
  xlab("") +
  ylab("Proportion who think algorithm is\nfairer than status quo method") +
  theme_new(base_size = 18) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 18)) +
  labs(fill = "") +
  guides(fill = guide_legend(ncol = 3, reverse = TRUE)) +
  facet_wrap(~derived_statusquo_cond, scales = "free_y",
             ncol = 1)  +
  scale_fill_manual(values = c("Republican" =
                                 friendly_pal("contrast_three")[2],
                               "Independent" = friendly_pal("contrast_three")[3],
                               "Democrat" =
                                 friendly_pal("contrast_three")[1])) 
  


ggsave(paste0(FIG_DIR, "partisan_compare.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 13,
       height = 9)

method_fair_compare_income <- df_analytic %>%
  group_by(derived_income, derived_statusquo_cond) %>%
  summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                             w = derived_comb_weight),
            se_prop = (prop_algmorefair*(1-prop_algmorefair)/n())^(1/2),
            lower = prop_algmorefair - se_prop,
            upper = prop_algmorefair + se_prop)  %>%
  ungroup() %>%
  rename(category = derived_income) 

cat_order_income <- c("Income: <$30,000", "Income: $30-$60,000", "Income: $60-$100,000",
                      "Income: >$100,000+")


ggplot(method_fair_compare_income,
       aes(x = factor(category, levels = cat_order_income, ordered = TRUE), 
           y = prop_algmorefair, group = category,
           fill = factor(category, levels = cat_order_income, ordered = TRUE))) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  geom_label(aes(x = factor(category, levels = cat_order_income, ordered = TRUE), 
                 y = prop_algmorefair,
                 label = round(prop_algmorefair, 2),
                 group = factor(category, levels = cat_order_income, ordered = TRUE)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.85,
             size = 6.5) +
  geom_errorbar(aes(ymin = lower, ymax = upper),
                width = 0.2,
                position = position_dodge(width = 1)) +
  coord_flip() +
  xlab("") +
  ylab("Proportion who think algorithm is\nfairer than status quo method") +
  theme_new(base_size = 18) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 18)) +
  labs(fill = "") +
  guides(fill = guide_legend(ncol = 3, reverse = TRUE)) +
  facet_wrap(~derived_statusquo_cond, scales = "free_y",
             ncol = 1)  +
  scale_fill_manual(values = friendly_pal("vibrant_seven"))

ggsave(paste0(FIG_DIR, "income_compare.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 13,
       height = 9)

#####################################################
###### Summarizing qual patterns 
#####################################################

# Using LLM-generated classifications
# of open-text responses 

# Combine hand coded observations + LLM coded observations
# Restricted to ones with valid responses  
llm_coded_in <- readRDS(here("data/llm_results_unlab.RDS")) %>%
  # Align variables with handcoded ones 
  mutate(salience_targeting = ifelse(salience_target_prediction == 1, 
                                     "Salient", "Not salient"),
         salience_impersonal_personal = ifelse(salience_impersonal_prediction == 1, 
                                     "Salient", "Not salient"),
         valence_impersonalgood_bin = ifelse(valence_impersonal_prediction == "Impersonal is good",
                                                    TRUE, FALSE),
         valence_targetgood_bin = ifelse(valence_target_prediction == "More targeting is good",
                                                TRUE, FALSE)) %>%
  # Join on choice and condition 
  left_join(df_analytic %>%
              select(CaseId, derived_alg_morefair,
                     derived_statusquo_cond), by = "CaseId") %>%
  rename(condition = derived_statusquo_cond,
         valence_impersonalgood_combined = valence_impersonal_prediction,
         valence_targetgood_combined = valence_target_prediction) %>%
  mutate(choice = ifelse(derived_alg_morefair, "Algorithm", "Status quo method"))

colSums(is.na(llm_coded_in))

# Load hand-coded observations
combined_qual <- read.csv(here("data/fr_cleaned_10212025.csv"))

## Remove observations where the answer was nonsensical or "I don't know"
combined_qual_valid <- combined_qual %>% filter(coded. == "Yes") %>%
  mutate(condition = gsub("\\_algfairer|\\_otherfairer", "", limited_cell),
         choice = ifelse(grepl("algfairer", limited_cell), "Algorithm", 
                         "Status quo method")) %>%
  mutate(inadequate_response_tocode_bin = ifelse(inadequate_response_tocode == "Yes", 1, 0),
         valence_impersonalgood_combined = case_when(inadequate_response_tocode_bin == 1 | 
                                                       salience_impersonal_personal == "Not salient" ~ "N/A",
                                                     valence_impersonal_v_personal == "Other (salient but unclear)" ~ "Other (salient but unclear)",
                                                     valence_impersonal_v_personal %in%
                                                       c("Impersonal is good",
                                                         "Personal is bad",
                                                         "Personal is bad + impersonal is good") ~ "Impersonal is good",
                                                     TRUE ~ "Impersonal is bad"),
         valence_targetgood_combined = case_when(inadequate_response_tocode_bin == 1 | 
                                                   salience_targeting == "Not salient" ~ "N/A",
                                                 valence_targeted_v_not == "Other (salient but unclear)" ~ "Other (salient but unclear)", 
                                                 valence_targeted_v_not == "More targeting is good" ~ "More targeting is good", 
                                                 TRUE ~ "Less targeting is good")) %>%
  # Also create binary version 
  mutate(valence_impersonalgood_bin = ifelse(valence_impersonalgood_combined == "Impersonal is good",
                                             TRUE, FALSE),
         valence_targetgood_bin = ifelse(valence_targetgood_combined == "More targeting is good",
                                         TRUE, FALSE))

# Bind together hand coded and LLM-coded observations
llmhand_qual_valid <- bind_rows(llm_coded_in, combined_qual_valid) %>%
  select(CaseId, condition, choice,
         salience_targeting, salience_impersonal_personal,
         valence_targetgood_bin, valence_impersonalgood_bin,
         valence_targetgood_combined, valence_impersonalgood_combined)

colSums(is.na(llmhand_qual_valid))

# salience
salience_llm <- llmhand_qual_valid %>% 
  select(salience_targeting, condition, salience_impersonal_personal, CaseId) %>%
  reshape2::melt(, id.vars = c("condition", "CaseId")) %>%
  group_by(condition, variable) %>%
  summarise(perc = sum(value == "Salient")/n()) %>%
  ungroup() %>%
  mutate(attribute = ifelse(grepl("targeting", variable), 
                            "Degree of Targeting", "Impersonality"), 
         outcome = "Salience")

## valence 
valence_targeting_llm <- llmhand_qual_valid %>% 
  select(condition, CaseId, choice, 
         valence_targetgood_bin, salience_targeting,
         valence_targetgood_combined) %>%
  # Calculate only among those where targeting is salient
  # and it has a clear valence (exclude other)
  filter(salience_targeting == "Salient" & valence_targetgood_combined != "Other (salient but unclear)") %>%
  select(-salience_targeting, -valence_targetgood_combined) %>% 
  reshape2::melt(, id.vars = c("condition", "CaseId", "choice")) %>%
  group_by(condition, variable, choice) %>%
  summarise(perc = mean(value)) %>%
  ungroup() %>%
  mutate(attribute = "Degree of Targeting",
         outcome = "Valence")


valence_impersonal_llm <- llmhand_qual_valid %>% 
  select(condition, CaseId,
         choice,
         valence_impersonalgood_bin,
         salience_impersonal_personal,
         valence_impersonalgood_combined) %>%
  filter(salience_impersonal_personal == "Salient" & valence_impersonalgood_combined != "Other (salient but unclear)") %>%
  select(-salience_impersonal_personal, -valence_impersonalgood_combined) %>% 
  reshape2::melt(, id.vars = c("condition", "CaseId", "choice")) %>%
  group_by(condition, variable, choice) %>%
  summarise(perc = mean(value)) %>%
  ungroup() %>%
  mutate(attribute = "Impersonality",
         outcome = "Valence")

## rbind the two dataframes 
valence_salience_forplot_llm <- bind_rows(salience_llm, 
                                           rbind.data.frame(valence_impersonal_llm, 
                                                            valence_targeting_llm)) %>%
  select(-variable) %>%
  arrange(condition, choice, attribute) %>%
  mutate(combined_group = 
           case_when(attribute == "Degree of Targeting" & outcome == "Salience" ~ 
                       "Targeting salience",
                     attribute == "Degree of Targeting" & outcome == "Valence" ~ 
                       "Degree of Targeting: More targeting is good",
                     attribute == "Impersonality" & outcome == "Salience" ~ 
                       "Impersonality salience",
                     attribute == "Impersonality" & outcome == "Valence" ~ 
                       "Impersonality: More impersonal is good"),
         combined_group_order = 
           factor(combined_group,
                  levels = c("Targeting salience",
                             "Degree of Targeting: More targeting is good",
                             "Impersonality salience",
                             "Impersonality: More impersonal is good"),
                  ordered = TRUE))


## plot salience 
salience_plot_llm <- ggplot(filter(valence_salience_forplot_llm, outcome == "Salience"), 
                             aes(x = condition,
                                 y = perc)) +
  xlab("") +
  ylab("Proportion responses where dimension is salient") +
  geom_bar(stat = "identity", position = "dodge", 
           color = "black", fill =  "wheat4") +
  coord_flip() +
  facet_wrap(~attribute) +
  theme_new(base_size = 18) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_text(hjust = 1),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 16)) +
  ylim(0, 1) + 
  ggtitle("Salience")
salience_plot_llm


## plot valence
valence_plot_llm <- ggplot(filter(valence_salience_forplot_llm, outcome == "Valence"), 
                            aes(x = condition,
                                y = perc, fill = choice, group = choice)) +
  xlab("") +
  ylab("Proportion of responses agreeing") +
  geom_bar(stat = "identity", position = "dodge", color = "black") +

  coord_flip() +
  facet_wrap(~combined_group_order) +
  labs(fill = "Method respondent rated fairer") +
  guides(fill = guide_legend(reverse = TRUE)) +
  theme_new(base_size = 18) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_text(hjust = 1),
        axis.ticks.y = element_blank(),
        strip.text.x = element_text(size = 16)) +
  theme(legend.position = "bottom") +
  scale_fill_manual(values = c("Algorithm" = friendly_pal("vibrant_seven")[1],
                               "Status quo method" = "gray")) + 
  ylim(0, 1) + 
  ggtitle("Valence")
valence_plot_llm

## save as one graph
ggarrange(salience_plot_llm, valence_plot_llm, nrow = 2,
          heights = c(4, 6))
## save the figure 
ggsave(paste0(FIG_DIR, "qual_ratings_allobs.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 14,
       height = 12)

#####################################################
###### Main text - changes after bias update - need wide format x and xend 
#####################################################

compare_before_after <- function(data, groupname){
  
  ret <- data %>%
    filter(!is.na(derived_alg_morefair_poststatus)) %>%
    group_by(!!sym(groupname), derived_statusquo_cond) %>%
    summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                               w = derived_comb_weight),
              prop_algmorefair_post = weighted.mean(derived_alg_morefair_poststatus, 
                                                    w = derived_comb_weight))  %>%
    ungroup() %>%
    rename(category = !!sym(groupname))
  return(ret)

}

all_status_comparisons <- lapply(c("derived_educ_3cat",
                              "derived_polideo_category",
                              "derived_race",
                              "derived_income"),
                              function(x) compare_before_after(data = df_analytic,
                                                    groupname = x)) 
                          
all_status_comparisons_df <- do.call(rbind.data.frame, all_status_comparisons) %>%
                    mutate(drop_support = prop_algmorefair - prop_algmorefair_post)

## compare only among those who rated alg as fairer 
ggplot(all_status_comparisons_df %>% filter(!is.na(category)),
      aes(x = prop_algmorefair, xend = prop_algmorefair_post, y = 
          reorder(category, prop_algmorefair))) +
  geom_dumbbell(aes(x = prop_algmorefair, 
                    xend = prop_algmorefair_post),
                color = "wheat4",
                colour_x = "wheat4",
               colour_xend = "firebrick",
               size = 4.0,
               dot_guide = TRUE,
               dot_guide_size = 0.15) +
  xlim(0, 1) +
  facet_wrap(~derived_statusquo_cond) +
  theme_new(base_size = 16) +
  ylab("") +
  xlab("Proportion algorithm more fair: before status update (grey dot)\n versus after status update (red dot)") +
  theme(axis.text.y = element_text(size = 8),
        strip.text = element_text(size = 12))

## repeat not separating by status quo condition
compare_before_after_pooled <- function(data, groupname){
  
  ret <- data %>%
    filter(!is.na(derived_alg_morefair_poststatus)) %>%
    group_by(!!sym(groupname)) %>%
    summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                               w = derived_comb_weight),
              prop_algmorefair_post = weighted.mean(derived_alg_morefair_poststatus, 
                                                    w = derived_comb_weight))  %>%
    ungroup() %>%
    rename(category = !!sym(groupname))
  return(ret)
  
}

all_status_comparisons_pooled <- lapply(c("derived_educ_3cat",
                                   "derived_polideo_category",
                                   "derived_race",
                                   "derived_income"),
                                 function(x) compare_before_after_pooled(data = df_analytic,
                                                                  groupname = x)) 

all_status_comparisons_df_pooled <- do.call(rbind.data.frame, 
                                            all_status_comparisons_pooled) %>%
  mutate(drop_support = prop_algmorefair - prop_algmorefair_post)

# get overall, pooled across everyone
all_status_overall <- df_analytic %>%
  filter(!is.na(derived_alg_morefair_poststatus)) %>%
  summarise(prop_algmorefair = weighted.mean(derived_alg_morefair, 
                                             w = derived_comb_weight),
            prop_algmorefair_post = weighted.mean(derived_alg_morefair_poststatus, 
                                                  w = derived_comb_weight)) %>%
  mutate(drop_support = prop_algmorefair - prop_algmorefair_post)


## among who said yes, proportion change mind 
prop_changemind <- function(data, groupname){
  
  ret <- data %>%
    filter(!is.na(derived_alg_morefair_poststatus) & derived_alg_morefair) %>%
    group_by(!!sym(groupname)) %>%
    summarise(change_mind = mean(derived_other_morefair_poststatus))  %>%
    ungroup() %>%
    rename(category = !!sym(groupname))
  return(ret)
  
}

all_changemind<- lapply(c("derived_educ_3cat",
                                   "derived_polideo_category",
                                   "derived_race",
                                   "derived_income"),
                                 function(x) prop_changemind(data = df_analytic,
                                                                  groupname = x)) 

all_changemind_df <- do.call(rbind.data.frame, all_changemind) %>%
          mutate(prop_algmorefair = 1,
            prop_algmorefair_post = 1-change_mind) %>%
  # relabel groups to make consistent 
  mutate(category = case_when(grepl("liberal", category) ~ "Liberal",
                              grepl("conservative", category) ~ "Conservative",
                              grepl("Moderate", category) ~ "Moderate",
                              grepl("Other", category) ~ "2+ races or other",
                              TRUE ~ category)) %>%
  mutate(category = str_replace(category, "NH", "non-Hispanic"))

# proportion changed overall, pooling everyone 
all_changemind_overall <- df_analytic %>% 
  filter(!is.na(derived_alg_morefair_poststatus) & derived_alg_morefair) %>%
  summarise(change_mind = mean(derived_other_morefair_poststatus))

# two-panel plot with initial fairness ratings versus 
# recommend against following bias update
propchange_plot <- ggplot(all_changemind_df %>% filter(!is.na(category)),
       aes(x = change_mind, y = 
             reorder(category, change_mind))) +
  geom_bar(stat = "identity", 
           color = "black",
           fill = "wheat4") +
  geom_vline(xintercept = all_changemind_overall$change_mind[1],
             color = "black", linetype = "dotted", size = 0.9) + 
  ylab("") +
  theme_new(base_size = 18) +
  theme(legend.position = "bottom",
        legend.background = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        plot.subtitle=element_text(size=16, face='italic'),
        panel.grid.major.y = element_line(color = "gray", linetype = 2, linewidth = 0.2)) +
  xlab("Proportion recommend school district\nNOT use the algorithm") + 
  labs(title='Proportion recommend against algorithm post-update', subtitle='Among those who initially rated algorithm as fairer') + 
  geom_label(aes(y = factor(category), x = change_mind,
                 label = round(change_mind, 2)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.25,
             size = 6) +
  xlim(0,1)

propchange_plot 

## rates pooled across status quo conditions
propfairer_plot <- all_status_comparisons_df_pooled %>% 
  filter(!is.na(category)) %>%
  # relabel groups to make consistent 
  mutate(category = case_when(grepl("liberal", category) ~ "Liberal",
                              grepl("conservative", category) ~ "Conservative",
                              grepl("Moderate", category) ~ "Moderate",
                              grepl("Other", category) ~ "2+ races or other",
                              TRUE ~ category)) %>%
  mutate(category = str_replace(category, "NH", "non-Hispanic")) %>%
  # set up to order bars by proportion change
  left_join(all_changemind_df %>% 
              filter(!is.na(category)) %>%
              select(-prop_algmorefair, -prop_algmorefair_post), 
            by = "category") %>%
  arrange(change_mind) %>%
  mutate(category = factor(category, levels = unique(category))) %>%
  ggplot(aes(x = category, y = prop_algmorefair)) +
  geom_bar(stat = "identity", color = "black", fill  = "#33BBEE") +
  geom_label(aes(x = category, y = prop_algmorefair,
                 label = round(prop_algmorefair, 2)),
             color = "white", fill = NA, label.size = 0,
             position = position_dodge(width = 0.5), hjust = 1.25,
             size = 6) +
  coord_flip() + 
  ylim(0, 1) +
  theme_new(base_size = 18) +
  theme(legend.background = element_blank(),
        axis.text.y = element_text(hjust = 1),
        axis.ticks.y = element_blank(),
        plot.subtitle=element_text(size=16, face='italic'),
        panel.grid.major.y = element_line(color = "gray", linetype = 2, linewidth = 0.2)
        ) +
  xlab("") +
  ylab("Proportion who think algorithm is fairer\n") +
  labs(title = 'Initial views on the algorithm', 
       subtitle = 'Before update about algorithmic bias') 

propfairer_plot

ggarrange(propfairer_plot, propchange_plot, ncol = 2,
          widths = c(6, 5))

ggsave(paste0(FIG_DIR, "update_twopanel.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 18,
       height = 8)
  

  
#####################################################
###### Supplement on time distribution of responses
#####################################################

### timing of responses
debate <- as.Date("2021-09-29")
election <- as.Date("2021-11-02")
df_analytic <- df_analytic %>%
  mutate(derived_date_day = as.Date(gsub("\\s+.*", "",  STARTDT),
                                    format = "%m/%d/%Y"),
         derived_postelection = ifelse(derived_date_day > election, TRUE, FALSE))


library(zoo)
time_choose_fair <- df_analytic %>%
            group_by(derived_date_day) %>%
            filter(derived_statusquo_cond == "Parent requests" & !is.na(derived_polideo_category)) %>%
            summarise(count_responses = n(),
                      count_cons = sum(derived_polideo_category == "Slightly - extremely conservative"),
                      prop_cons_dayrespondents = count_cons/count_responses,
                      prop_par_cons = sum(derived_other_morefair[derived_polideo_category == "Slightly - extremely conservative"])/(sum(derived_polideo_category == "Slightly - extremely conservative")),
                      prop_par_lib = sum(derived_other_morefair[derived_polideo_category == "Slightly - extremely liberal"])/((sum(derived_polideo_category == "Slightly - extremely liberal")))) %>%
            mutate(size_gap = prop_par_cons - prop_par_lib,
                   smoothed_size_gap = zoo::rollmean(size_gap, k = 5, fill = NA)) 


ggplot(time_choose_fair, aes(x = derived_date_day, y = count_responses)) +
  geom_bar(stat = "identity", color = "black", fill = "wheat4") +
  geom_vline(xintercept = c(debate, election), 
             linetype = "dashed", 
             color = "red") +
  scale_x_date(date_breaks = "4 days") +
  theme_new(base_size = 24) +
  theme(axis.text.x = element_text(angle = 90, size = 12)) +
  xlab("Date survey taken") +
  ylab("Count of respondents")


ggsave(paste0(FIG_DIR, "time_dist_responses.pdf"),
       plot = last_plot(),
       device = "pdf",
       width = 12,
       height = 8)


